Flowchart

library(GENIE3)
library(doParallel)
library(igraph)
library(tidyverse)
library(DT)
library(reticulate)
library(learn2count)
library(rbenchmark)
library(reshape2)
library(gridExtra)
library(DiagrammeR)
library(pROC)
source("dropo.R")
source("generate_adjacency.R")
source("simmetric.R")
source("pscores.R")
source("plotg.R")
source("compare_consensus.R")
source("earlyj.R")
source("plotROC.R")
source("cutoff_adjacency.R")
DiagrammeR::grViz("
digraph biological_workflow {
  # Set up the graph attributes
  graph [layout = dot, rankdir = TB]

  # Define consistent node styles
  node [shape = rectangle, style = filled, color = lightblue, fontsize = 12]

  # Define nodes for each step
  StartNode [label = 'Biological String Regulatory Network', shape = oval, color = forestgreen, fontcolor = black]
  AdjacencyMatrix [label = 'Create Adjacency Matrix', shape = rectangle, color = lightblue]
  SimulateData [label = 'Simulate Single-Cell Data', shape = rectangle, color = lightyellow]

  # Reconstruction using Three Packages
  GENIE3Step [label = 'GENIE3: Calculate Gene Weights', shape = rectangle, color = lightpink]
  GRNBoostStep [label = 'GRNBoost2: Calculate Gene Weights', shape = rectangle, color = lightpink]
  Learn2CountStep [label = 'learn2count: Calculate Gene Weights', shape = rectangle, color = lightpink]

  # Generate Adjacency Matrices for Each Package
  GENIE3Adj [label = 'GENIE3: Generate Adjacency Matrix', shape = rectangle, color = khaki]
  GRNBoostAdj [label = 'GRNBoost2: Generate Adjacency Matrix', shape = rectangle, color = khaki]
  Learn2CountAdj [label = 'learn2count: Generate Adjacency Matrix', shape = rectangle, color = khaki]

  # Symmetrize Step
  Symmetrize [label = 'Symmetrize Adjacency Matrix', shape = rectangle, color = lightyellow]

  # Comparison with Ground Truth
  Compare [label = 'Compare with Ground Truth Adjacency', shape = rectangle, color = salmon]

  # Analysis and Visualization
  Analysis [label = 'Analysis and Visualization', shape = rectangle, color = lightcoral]

  # Define the workflow structure
  StartNode -> AdjacencyMatrix
  AdjacencyMatrix -> SimulateData
  SimulateData -> GENIE3Step
  SimulateData -> GRNBoostStep
  SimulateData -> Learn2CountStep
  GENIE3Step -> GENIE3Adj
  GRNBoostStep -> GRNBoostAdj
  Learn2CountStep -> Learn2CountAdj
  GENIE3Adj -> Symmetrize
  GRNBoostAdj -> Symmetrize
  Learn2CountAdj -> Symmetrize
  Symmetrize -> Compare
  Compare -> Analysis
}
")

Tcell Ground Truth

adjm <- read.table("./../data/adjacency_matrix.csv", header = T, row.names = 1, sep = ",") %>% as.matrix()
gtruth <- igraph::graph_from_adjacency_matrix(adjm, mode = "undirected", diag = F)

num_nodes <- vcount(gtruth)
num_edges <- ecount(gtruth)

set.seed(1234)
plot(gtruth, 
     main = paste("Ground Truth\nNodes:", num_nodes, "Edges:", num_edges),
     vertex.label.color = "black",
     vertex.size = 6, 
     edge.width = 2, 
     vertex.label = NA,
     vertex.color = "steelblue",
     layout = igraph::layout_with_fr)

Simulate Data

ncell <- 500
nodes <- nrow(adjm)

set.seed(1130)
count_matrices <- lapply(1:5, function(i) {
  set.seed(1130 + i)
  count_matrix_i <- simdata(n = ncell, p = nodes, B = adjm, family = "ZINB", 
                            mu = 5, mu_noise = 1, theta = 1, pi = 0.2)
  
  count_matrix_df <- as.data.frame(count_matrix_i)
  colnames(count_matrix_df) <- colnames(adjm)
  rownames(count_matrix_df) <- paste("cell", 1:nrow(count_matrix_df), sep = "")
  
  return(count_matrix_df)
})

count_matrices[[1]] %>%
    datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Simulated count matrix")
saveRDS(count_matrices, "./../analysis/count_matrices.RDS")

Single matrix

GENIE3

set.seed(1234)
genie3_single <- list()

for (i in seq_along(list(count_matrices[[1]]))) {
  regulatory_network_genie3 <- GENIE3(t(count_matrices[[i]]), nCores=16)
  genie3out <- getLinkList(regulatory_network_genie3)
  genie3_single[[paste0("Matrix_", i)]] <- genie3out
  }

saveRDS(genie3_single, "./../analysis/genie3_single.RDS")

genie3_single[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

Simmetric Output and ROC

sgenie3_single <- simmetric(list(genie3_single[[1]]), weight_function = "mean")
plotROC(sgenie3_single, adjm, plot_title = "ROC curve - GENIE3 Single Matrix")

sgenie3_single[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 simmetric output")

GENIE3 wo nCores

set.seed(1234)
genie3_single <- list()

for (i in seq_along(list(count_matrices[[1]]))) {
  regulatory_network_genie3 <- GENIE3(t(count_matrices[[i]]))
  genie3out <- getLinkList(regulatory_network_genie3)
  genie3_single[[paste0("Matrix_", i)]] <- genie3out
  }

saveRDS(genie3_single, "./../analysis/genie3_single.RDS")

genie3_single[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")
sgenie3_single <- simmetric(list(genie3_single[[1]]), weight_function = "mean")
plotROC(sgenie3_single, adjm, plot_title = "ROC curve - GENIE3 Single Matrix")

sgenie3_single[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 simmetric output")

Generate Adjacency and Apply Cutoff

sgenie3_single_wadj <- generate_adjacency(sgenie3_single)
sgenie3_single_adj <- cutoff_adjacency(count_matrices = list(count_matrices[[1]]),
                 weighted_adjm_list = sgenie3_single_wadj, 
                 n = 50,
                 method = "GENIE3")

sgenie3_single_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 weight adjacency")
sgenie3_single_adj$binary_adjm_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgenie3_single_adj$binary_adjm_list)

print(scores$Jaccard_Heatmap)

print(scores$Metrics_Barplot)

scores$Metrics_Results %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
#plots <- plotg(sgenie3_single_adj$binary_adjm_list)
ajm_compared <- compare_consensus(sgenie3_single_adj$binary_adjm_list, adjm)

GRNBoost2

use_python("/usr/bin/python3", required = TRUE)
arboreto <- import("arboreto.algo")
pandas <- import("pandas")
numpy <- import("numpy")
set.seed(1234)
grnb_single <- list()

for (i in seq_along(list(count_matrices[[1]]))) {
  count_matrix_df <- as.data.frame(count_matrices[[i]])
  genes <- colnames(count_matrix_df)
  df_pandas <- pandas$DataFrame(data = count_matrix_df, columns = genes, index = rownames(count_matrix_df))
  grn_links <- arboreto$grnboost2(df_pandas, gene_names = genes)
  grnb_single[[i]] <- grn_links
  }

saveRDS(grnb_single, "./../analysis/grnb_single.RDS")

grnb_single[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

Simmetric Output and ROC

sgrnb_single <- simmetric(list(grnb_single[[1]]), weight_function = "mean")
plotROC(sgrnb_single, adjm, plot_title = "ROC curve - GRNBoost2 Single Matrix")

sgrnb_single[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 simmetric output")

Generate Adjacency and Apply Cutoff

sgrnb_single_wadj <- generate_adjacency(sgrnb_single)
sgrnb_single_adj <- cutoff_adjacency(count_matrices = list(count_matrices[[1]]),
                 weighted_adjm_list = sgrnb_single_wadj, 
                 n = 50,
                 method = "GRNBoost2")

sgrnb_single_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 weight adjacency")
sgrnb_single_adj$binary_adjm_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgrnb_single_adj$binary_adjm_list)

print(scores$Jaccard_Heatmap)

print(scores$Metrics_Barplot)

scores$Metrics_Results %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
#plots <- plotg(sgrnb3_single_adj$binary_adjm_list)
ajm_compared <- compare_consensus(sgrnb_single_adj$binary_adjm_list, adjm)

Matrices Integration

Early Integration

early_matrix <- list(earlyj(count_matrices))

GENIE3

set.seed(1234)
genie3_early <- list()

for (i in seq_along(list(early_matrix[[1]]))) {
  regulatory_network_genie3 <- GENIE3(t(early_matrix[[i]]), nCores=16)
  genie3out <- getLinkList(regulatory_network_genie3)
  genie3_early[[paste0("Matrix_", i)]] <- genie3out
  }

saveRDS(genie3_early, "./../analysis/genie3_early.RDS")

genie3_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

Simmetric Output and ROC

sgenie3_early <- simmetric(list(genie3_early[[1]]), weight_function = "mean")
plotROC(sgenie3_early, adjm, plot_title = "ROC curve - GENIE3 Early Integration")

sgenie3_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 simmetric output")

Generate Adjacency and Apply Cutoff

sgenie3_early_wadj <- generate_adjacency(sgenie3_early)
sgenie3_early_adj <- cutoff_adjacency(count_matrices = list(early_matrix[[1]]),
                 weighted_adjm_list = sgenie3_early_wadj, 
                 n = 50,
                 method = "GENIE3")

sgenie3_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 weight adjacency")
sgenie3_early_adj$binary_adjm_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgenie3_early_adj$binary_adjm_list)

print(scores$Jaccard_Heatmap)

print(scores$Metrics_Barplot)

scores$Metrics_Results %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
#plots <- plotg(sgenie3_single_adj$binary_adjm_list)
ajm_compared <- compare_consensus(sgenie3_early_adj$binary_adjm_list, adjm)

GRNBoost2

set.seed(1234)
grnb_early <- list()

for (i in seq_along(list(early_matrix[[1]]))) {
  count_matrix_df <- as.data.frame(early_matrix[[i]])
  genes <- colnames(count_matrix_df)
  df_pandas <- pandas$DataFrame(data = count_matrix_df, columns = genes, index = rownames(count_matrix_df))
  grn_links <- arboreto$grnboost2(df_pandas, gene_names = genes)
  grnb_early[[i]] <- grn_links
  }

saveRDS(grnb_early, "./../analysis/grnb_early.RDS")

grnb_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

Simmetric Output and ROC

sgrnb_early <- simmetric(list(grnb_early[[1]]), weight_function = "mean")
plotROC(sgrnb_early, adjm, plot_title = "ROC curve - GRNBoost2 Early Integration")

sgrnb_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 simmetric output")

Generate Adjacency and Apply Cutoff

sgrnb_early_wadj <- generate_adjacency(sgrnb_early)
sgrnb_early_adj <- cutoff_adjacency(count_matrices = list(early_matrix[[1]]),
                 weighted_adjm_list = sgrnb_early_wadj, 
                 n = 50,
                 method = "GRNBoost2")

sgrnb_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 weight adjacency")
sgrnb_early_adj$binary_adjm_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgrnb_early_adj$binary_adjm_list)

print(scores$Jaccard_Heatmap)

print(scores$Metrics_Barplot)

scores$Metrics_Results %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
#plots <- plotg(sgrnb3_single_adj$binary_adjm_list)
ajm_compared <- compare_consensus(sgrnb_early_adj$binary_adjm_list, adjm)

Late Integration

GENIE3

set.seed(1234)
genie3_late <- list()

for (i in seq_along(count_matrices)) {
  regulatory_network_genie3 <- GENIE3(t(count_matrices[[i]]), nCores=16)
  genie3out <- getLinkList(regulatory_network_genie3)
  genie3_late[[paste0("Matrix_", i)]] <- genie3out
  }

saveRDS(genie3_late, "./../analysis/genie3_late.RDS")

genie3_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

Simmetric Output and ROC

sgenie3_late <- simmetric(genie3_late, weight_function = "mean")
plotROC(sgenie3_late, adjm, plot_title = "ROC curve - GENIE3 Late Integration")

sgenie3_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 simmetric output")

Generate Adjacency and Apply Cutoff

sgenie3_late_wadj <- generate_adjacency(sgenie3_late)
sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgenie3_late_wadj, 
                 n = 5,
                 method = "GENIE3")

sgenie3_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 weight adjacency")
sgenie3_late_adj$binary_adjm_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgenie3_late_adj$binary_adjm_list)

print(scores$Jaccard_Heatmap)

print(scores$Metrics_Barplot)

scores$Metrics_Results %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgenie3_late_adj$binary_adjm_list)

ajm_compared <- compare_consensus(sgenie3_late_adj$binary_adjm_list, adjm)

GRNBoost2

set.seed(1234)
grnb_late <- list()

for (i in seq_along(count_matrices)) {
  count_matrix_df <- as.data.frame(count_matrices[[i]])
  genes <- colnames(count_matrix_df)
  df_pandas <- pandas$DataFrame(data = count_matrix_df, columns = genes, index = rownames(count_matrix_df))
  grn_links <- arboreto$grnboost2(df_pandas, gene_names = genes)
  grnb_late[[i]] <- grn_links
  }

saveRDS(grnb_late, "./../analysis/grnb_late.RDS")

grnb_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

Simmetric Output and ROC

sgrnb_late <- simmetric(grnb_late, weight_function = "mean")
plotROC(sgrnb_late, adjm, plot_title = "ROC curve - GRNBoost2 Late Integration")

sgrnb_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 simmetric output")

Generate Adjacency and Apply Cutoff

sgrnb_late_wadj <- generate_adjacency(sgrnb_late)
sgrnb_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgrnb_late_wadj, 
                 n = 5,
                 method = "GRNBoost2")

sgrnb_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 weight adjacency")
sgrnb_late_adj$binary_adjm_list[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgrnb_late_adj$binary_adjm_list)

print(scores$Jaccard_Heatmap)

print(scores$Metrics_Barplot)

scores$Metrics_Results %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgrnb_late_adj$binary_adjm_list)

ajm_compared <- compare_consensus(sgrnb_late_adj$binary_adjm_list, adjm)

Joint Integration